from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from time import time
import numpy as np
import matplotlib.pyplot as plt

lfw = fetch_lfw_people(min_faces_per_person=70)
X = lfw.data
y = lfw.target

n_samples, h, w = lfw.images.shape
n_features = X.shape[1]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42)

PCA_time = []; SVC_time = []; test_precision = []; test_recall = []
for n_eigenvectors in range(5,300,20):
    #print(int((300-n_eigenvectors)/20), end = " ")
    t0 = time()
    pca = PCA(n_components=n_eigenvectors, whiten=True).fit(X_train)
    PCA_time.append(time() - t0)

    # we have chosen to use fit, and then transform
    X_train_pca = pca.transform(X_train)  
    X_test_pca = pca.transform(X_test)    # instead fit_transform
  
    t0 = time()
    svc = SVC(C = 1000, gamma = 0.005, kernel='rbf',
              class_weight='balanced')
    svc = svc.fit(X_train_pca, y_train)
    SVC_time.append(time() - t0)

    y_pred_test = svc.predict(X_test_pca)

    test_precision.append(precision_score(y_test, y_pred_test, 
                          average = 'weighted'))
    test_recall.append(recall_score(y_test, y_pred_test, 
                       average = 'weighted'))

x = range(5,300,20)
plt.figure(figsize=(8,4))
plt.plot(x, PCA_time, '-y', label = 'PCA time')
plt.plot(x, SVC_time, '-k', label = 'SVC time')
plt.plot(x, test_precision, '--k', label = 'Precision')
plt.plot(x, test_recall, '--o', label = 'Recall')
plt.grid(); 
plt.xlabel('#eigenvectors (n_components)')
plt.legend() 
plt.show()
